Setup

Load Libraries

library(ggplot2)
library(readxl)
library(dplyr)
library(scales)
library(forcats)

Set Paths

# Adjust paths based on where you run this from
data_path <- "../result data"
output_path <- "screenshots"

# Create output directory if it doesn't exist
if (!dir.exists(output_path)) {
  dir.create(output_path, recursive = TRUE)
}

KPI 1: Stress Index

Load Data

data_kpi1 <- read_excel(file.path(data_path, "quarter_stress_index.xlsx"))

# Preview
head(data_kpi1)
## # A tibble: 6 × 10
##   `District ID` `Quarter Name` `Population 2012` `Population 2025`
##   <chr>         <chr>                      <dbl>             <dbl>
## 1 1             City                         799               783
## 2 1             Hochschulen                  673               681
## 3 1             Lindenhof                    935              1011
## 4 1             Rathaus                     3164              3257
## 5 10            Höngg                      21537             24891
## 6 10            Wipkingen                  15791             16713
## # ℹ 6 more variables: `Traffic 2012` <dbl>, `Traffic 2025` <dbl>,
## #   `Population Growth Pct` <dbl>, `Traffic Growth Pct` <dbl>,
## #   `Stress Index Pct` <dbl>, `Stress Classification` <chr>
table(data_kpi1$`Stress Classification`)
## 
##             Balanced    Commuter pressure    Insufficient data 
##                   17                    8                    7 
## Residential pressure 
##                    2

Prepare Data

data_kpi1 <- data_kpi1 %>%
  mutate(
    has_complete_data = `Stress Classification` != "Insufficient data",
    classification_ordered = factor(`Stress Classification`, 
                                    levels = c("Residential pressure", 
                                              "Balanced", 
                                              "Commuter pressure",
                                              "Insufficient data"))
  )

Chart 1: Stress Index by Quarter

p_kpi1_quarter <- ggplot(data_kpi1, 
                         aes(x = reorder(`Quarter Name`, `Stress Index Pct`, na.rm = TRUE), 
                             y = `Stress Index Pct`, 
                             fill = classification_ordered)) +
  geom_col() +
  geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 0.5) +
  scale_fill_manual(
    values = c(
      "Residential pressure" = "#3A86FF",
      "Balanced" = "#90BE6D",
      "Commuter pressure" = "#F94144",
      "Insufficient data" = "#CCCCCC"
    ),
    name = "Classification"
  ) +
  labs(
    title = "Stress Index by Statistical Quarter",
    subtitle = "Traffic Growth % minus Population Growth % (2012-2025)\nPositive = Commuter Pressure | Negative = Residential Pressure",
    x = "Statistical Quarter",
    y = "Stress Index (%)",
    caption = "Grey bars indicate insufficient traffic data for reliable calculation"
  ) +
  coord_flip() +
  theme_minimal(base_size = 11) +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(size = 9, color = "grey30"),
    legend.position = "bottom",
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank()
  )

print(p_kpi1_quarter)

Chart 2: Average by District

# Aggregate by district
district_summary <- data_kpi1 %>%
  filter(has_complete_data) %>%
  group_by(`District ID`) %>%
  summarise(
    avg_stress = mean(`Stress Index Pct`, na.rm = TRUE),
    n_quarters = n(),
    .groups = "drop"
  ) %>%
  mutate(
    classification = case_when(
      avg_stress > 10 ~ "Commuter pressure",
      avg_stress < -10 ~ "Residential pressure",
      TRUE ~ "Balanced"
    ),
    classification = factor(classification, 
                           levels = c("Residential pressure", "Balanced", "Commuter pressure"))
  )

p_kpi1_district <- ggplot(district_summary, 
                          aes(x = factor(`District ID`), 
                              y = avg_stress, 
                              fill = classification)) +
  geom_col() +
  geom_hline(yintercept = c(-10, 10), linetype = "dashed", color = "grey50", linewidth = 0.5) +
  geom_hline(yintercept = 0, linetype = "solid", color = "black", linewidth = 0.7) +
  scale_fill_manual(
    values = c(
      "Residential pressure" = "#3A86FF",
      "Balanced" = "#90BE6D",
      "Commuter pressure" = "#F94144"
    ),
    name = "Classification"
  ) +
  labs(
    title = "Average Stress Index by City District",
    subtitle = "Districts with complete traffic data | Dashed lines mark ±10% classification thresholds",
    x = "City District",
    y = "Average Stress Index (%)",
    caption = "Based on quarters with complete 2012 and 2025 traffic measurements"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 10, color = "grey30"),
    legend.position = "bottom",
    panel.grid.minor = element_blank()
  )

print(p_kpi1_district)

Save KPI 1

ggsave(file.path(output_path, "kpi1_stress_index_by_quarter.png"), 
       plot = p_kpi1_quarter, width = 10, height = 12, dpi = 300, bg = "white")

ggsave(file.path(output_path, "kpi1_stress_index_by_district.png"), 
       plot = p_kpi1_district, width = 10, height = 6, dpi = 300, bg = "white")

KPI 2: Traffic Bottlenecks

Load Data

data_kpi2 <- read_excel(file.path(data_path, "bottleneck_and_peakhour.xlsx"))

# Preview
head(data_kpi2)
## # A tibble: 6 × 4
##   `Counting Site Name`  `Peak Hour` `Avg Volume` Status    
##   <chr>                 <chr>              <dbl> <chr>     
## 1 Schwamendingenstrasse 17:00               2908 Bottleneck
## 2 Sihlfeldstrasse       17:00               1649 Bottleneck
## 3 Rosengartenstrasse    16:00               1563 Bottleneck
## 4 Rosengartenstrasse    16:00               1530 Bottleneck
## 5 Manessestrasse        17:00               1443 Bottleneck
## 6 Manessestrasse        17:00               1401 Bottleneck
table(data_kpi2$Status)
## 
## Bottleneck     Normal 
##         22         88

Chart 1: Top 20 Bottlenecks

bottlenecks <- data_kpi2 %>%
  filter(Status == "Bottleneck") %>%
  arrange(desc(`Avg Volume`)) %>%
  slice_head(n = 20) %>%
  mutate(
    peak_hour_num = as.numeric(substr(`Peak Hour`, 1, 2)),
    hour_category = case_when(
      peak_hour_num >= 6 & peak_hour_num < 10 ~ "Morning Rush (06-10)",
      peak_hour_num >= 10 & peak_hour_num < 15 ~ "Midday (10-15)",
      peak_hour_num >= 15 & peak_hour_num < 20 ~ "Evening Rush (15-20)",
      TRUE ~ "Other"
    ),
    hour_category = factor(hour_category, 
                          levels = c("Morning Rush (06-10)", 
                                    "Midday (10-15)", 
                                    "Evening Rush (15-20)", 
                                    "Other"))
  )

p_kpi2_top <- ggplot(bottlenecks, 
                     aes(x = reorder(`Counting Site Name`, `Avg Volume`), 
                         y = `Avg Volume`,
                         fill = hour_category)) +
  geom_col() +
  geom_text(aes(label = `Peak Hour`), hjust = -0.2, size = 3) +
  scale_fill_manual(
    values = c(
      "Morning Rush (06-10)" = "#F4A261",
      "Midday (10-15)" = "#E9C46A",
      "Evening Rush (15-20)" = "#E76F51",
      "Other" = "#264653"
    ),
    name = "Peak Hour Period"
  ) +
  scale_y_continuous(labels = comma, expand = expansion(mult = c(0, 0.1))) +
  labs(
    title = "Top 20 Traffic Bottlenecks in Zurich",
    subtitle = "Sites with highest average peak-hour vehicle counts (2023-2025)",
    x = "",
    y = "Average Vehicles per Hour (Peak)",
    caption = "Threshold: 700 vehicles/hour | Peak hour shown on right side of bars"
  ) +
  coord_flip() +
  theme_minimal(base_size = 11) +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 10, color = "grey30"),
    legend.position = "bottom",
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank()
  )

print(p_kpi2_top)

Chart 2: Bottlenecks by Hour

hour_distribution <- data_kpi2 %>%
  filter(Status == "Bottleneck") %>%
  mutate(peak_hour_num = as.numeric(substr(`Peak Hour`, 1, 2))) %>%
  count(peak_hour_num, name = "bottleneck_count")

p_kpi2_hour <- ggplot(hour_distribution, 
                      aes(x = factor(peak_hour_num), y = bottleneck_count)) +
  geom_col(fill = "#E76F51", alpha = 0.8) +
  geom_text(aes(label = bottleneck_count), vjust = -0.5, size = 3.5) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "When Do Bottlenecks Occur?",
    subtitle = paste0("Distribution of peak hours across all bottleneck sites (n = ", 
                     sum(data_kpi2$Status == "Bottleneck"), ")"),
    x = "Hour of Day",
    y = "Number of Bottleneck Sites",
    caption = "Data period: 2023-2025"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 10, color = "grey30"),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank()
  )

print(p_kpi2_hour)

Save KPI 2

ggsave(file.path(output_path, "kpi2_top_bottlenecks.png"), 
       plot = p_kpi2_top, width = 10, height = 8, dpi = 300, bg = "white")

ggsave(file.path(output_path, "kpi2_bottleneck_by_hour.png"), 
       plot = p_kpi2_hour, width = 10, height = 6, dpi = 300, bg = "white")

KPI 3: Directional Flow

Load Data

data_kpi3 <- read_excel(file.path(data_path, "street_flow_direction.xlsx"))

# Remove missing values
data_kpi3 <- data_kpi3 %>% filter(!is.na(`Dominance Share`))

# Preview
head(data_kpi3)
## # A tibble: 6 × 6
##   `Counting Site Name` `Dominant Direction` `Dominant Volume` `Total Volume`
##   <chr>                <chr>                            <dbl>          <dbl>
## 1 Saatlenstrasse       outbound                       1847633        1847633
## 2 Stauffacherquai      Hauptbahnhof                   6833618        6833618
## 3 Dreikönigstrasse     Talstrasse                     7724516        7724516
## 4 Sihlfeldstrasse      Manessestrasse                22879177       22879177
## 5 Bändliweg            Höngg/Europabrücke             1629245        1875278
## 6 Brunaustrasse        Enge                           3246405        3831529
## # ℹ 2 more variables: `Dominance Share` <dbl>, Classification <chr>
table(data_kpi3$Classification)
## 
##           Balanced intersection Moderate directional preference 
##                              10                              55 
##      Strong corridor dependency 
##                              17

Chart 1: Classification Distribution

classification_summary <- data_kpi3 %>%
  count(Classification) %>%
  mutate(
    percentage = n / sum(n) * 100,
    label = paste0(n, " sites\n(", round(percentage, 1), "%)")
  )

p_kpi3_pie <- ggplot(classification_summary, aes(x = "", y = n, fill = Classification)) +
  geom_col(width = 1, color = "white", linewidth = 1) +
  geom_text(aes(label = label), 
            position = position_stack(vjust = 0.5),
            size = 4, fontface = "bold", color = "white") +
  coord_polar(theta = "y") +
  scale_fill_manual(
    values = c(
      "Strong corridor dependency" = "#D62828",
      "Moderate directional preference" = "#F77F00",
      "Balanced intersection" = "#90BE6D"
    )
  ) +
  labs(
    title = "Directional Flow Balance Across Zurich",
    subtitle = paste0("Classification of ", nrow(data_kpi3), " counting sites based on dominant direction share"),
    caption = "Strong = >60% | Moderate = 50-60% | Balanced = <50%"
  ) +
  theme_void(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
    plot.subtitle = element_text(size = 10, color = "grey30", hjust = 0.5),
    plot.caption = element_text(size = 9, color = "grey50", hjust = 0.5),
    legend.position = "bottom",
    legend.title = element_blank()
  )

print(p_kpi3_pie)

Chart 2: Top 15 Imbalanced Sites

top_imbalanced <- data_kpi3 %>%
  filter(Classification == "Strong corridor dependency") %>%
  arrange(desc(`Dominance Share`)) %>%
  slice_head(n = 15)

p_kpi3_top <- ggplot(top_imbalanced, 
                     aes(x = reorder(`Counting Site Name`, `Dominance Share`), 
                         y = `Dominance Share`,
                         fill = `Dominant Direction`)) +
  geom_col() +
  geom_text(aes(label = paste0(round(`Dominance Share` * 100, 1), "%")), 
            hjust = -0.1, size = 3) +
  scale_y_continuous(labels = percent, limits = c(0, 1), 
                    expand = expansion(mult = c(0, 0.08))) +
  scale_fill_brewer(palette = "Set2", name = "Dominant\nDirection") +
  labs(
    title = "Top 15 Sites with Strongest Directional Imbalance",
    subtitle = "Sites where one direction dominates traffic flow (>60% of total volume)",
    x = "",
    y = "Dominance Share (% of Total Traffic)",
    caption = "Data period: 2023-2025 | Percentage labels show share of dominant direction"
  ) +
  coord_flip() +
  theme_minimal(base_size = 11) +
  theme(
    plot.title = element_text(face = "bold", size = 13),
    plot.subtitle = element_text(size = 10, color = "grey30"),
    legend.position = "right",
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank()
  )

print(p_kpi3_top)

Chart 3: Dominance Distribution

p_kpi3_hist <- ggplot(data_kpi3, aes(x = `Dominance Share`, fill = Classification)) +
  geom_histogram(bins = 20, color = "white", alpha = 0.8) +
  geom_vline(xintercept = c(0.5, 0.6), linetype = "dashed", 
             color = "black", linewidth = 0.7) +
  annotate("text", x = 0.55, y = Inf, label = "Moderate\nThreshold", 
           vjust = 1.5, size = 3, fontface = "italic") +
  annotate("text", x = 0.65, y = Inf, label = "Strong\nThreshold", 
           vjust = 1.5, size = 3, fontface = "italic") +
  scale_x_continuous(labels = percent) +
  scale_fill_manual(
    values = c(
      "Strong corridor dependency" = "#D62828",
      "Moderate directional preference" = "#F77F00",
      "Balanced intersection" = "#90BE6D"
    )
  ) +
  labs(
    title = "Distribution of Directional Dominance",
    subtitle = "How traffic is distributed across directions at all counting sites",
    x = "Dominance Share (% of Traffic in Dominant Direction)",
    y = "Number of Sites",
    caption = "Vertical lines mark classification thresholds at 50% and 60%"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 10, color = "grey30"),
    legend.position = "bottom",
    legend.title = element_blank(),
    panel.grid.minor = element_blank()
  )

print(p_kpi3_hist)

Save KPI 3

ggsave(file.path(output_path, "kpi3_classification_distribution.png"), 
       plot = p_kpi3_pie, width = 8, height = 8, dpi = 300, bg = "white")

ggsave(file.path(output_path, "kpi3_top_imbalanced_sites.png"), 
       plot = p_kpi3_top, width = 11, height = 8, dpi = 300, bg = "white")

ggsave(file.path(output_path, "kpi3_dominance_distribution.png"), 
       plot = p_kpi3_hist, width = 10, height = 6, dpi = 300, bg = "white")

Summary

cat("============================================================================\n")
## ============================================================================
cat("VISUALIZATION GENERATION COMPLETE\n")
## VISUALIZATION GENERATION COMPLETE
cat("============================================================================\n")
## ============================================================================
cat("Total charts generated: 8\n")
## Total charts generated: 8
cat("Output directory:", output_path, "\n\n")
## Output directory: screenshots
generated_files <- list.files(output_path, pattern = "^kpi.*\\.png$", full.names = FALSE)
cat("Generated files:\n")
## Generated files:
for(file in generated_files) {
  cat(paste0("  ✓ ", file, "\n"))
}
##   ✓ kpi0_population_traffic_trends.png
##   ✓ kpi1_stress_index_by_district.png
##   ✓ kpi1_stress_index_by_quarter.png
##   ✓ kpi2_bottleneck_by_hour.png
##   ✓ kpi2_top_bottlenecks.png
##   ✓ kpi3_classification_distribution.png
##   ✓ kpi3_dominance_distribution.png
##   ✓ kpi3_top_imbalanced_sites.png
cat("\n✓ All visualizations ready for report!\n")
## 
## ✓ All visualizations ready for report!